In [3]:
import pandas as pd
In [4]:
import warnings
warnings.filterwarnings('ignore')
In [5]:
crp_rcmnd = pd.read_csv("Crop_recommendation.csv")
In [6]:
crp_rcmnd
Out[6]:
N P K temperature humidity ph rainfall label
0 90 42 43 20.879744 82.002744 6.502985 202.935536 rice
1 85 58 41 21.770462 80.319644 7.038096 226.655537 rice
2 60 55 44 23.004459 82.320763 7.840207 263.964248 rice
3 74 35 40 26.491096 80.158363 6.980401 242.864034 rice
4 78 42 42 20.130175 81.604873 7.628473 262.717340 rice
... ... ... ... ... ... ... ... ...
2195 107 34 32 26.774637 66.413269 6.780064 177.774507 coffee
2196 99 15 27 27.417112 56.636362 6.086922 127.924610 coffee
2197 118 33 30 24.131797 67.225123 6.362608 173.322839 coffee
2198 117 32 34 26.272418 52.127394 6.758793 127.175293 coffee
2199 104 18 30 23.603016 60.396475 6.779833 140.937041 coffee

2200 rows × 8 columns

Describing the data¶

In [7]:
crp_rcmnd.dtypes
Out[7]:
N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object
In [8]:
crp_rcmnd.columns
Out[8]:
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
In [9]:
crp_rcmnd.index
Out[9]:
RangeIndex(start=0, stop=2200, step=1)
In [10]:
crp_rcmnd.describe()
Out[10]:
N P K temperature humidity ph rainfall
count 2200.000000 2200.000000 2200.000000 2200.000000 2200.000000 2200.000000 2200.000000
mean 50.551818 53.362727 48.149091 25.616244 71.481779 6.469480 103.463655
std 36.917334 32.985883 50.647931 5.063749 22.263812 0.773938 54.958389
min 0.000000 5.000000 5.000000 8.825675 14.258040 3.504752 20.211267
25% 21.000000 28.000000 20.000000 22.769375 60.261953 5.971693 64.551686
50% 37.000000 51.000000 32.000000 25.598693 80.473146 6.425045 94.867624
75% 84.250000 68.000000 49.000000 28.561654 89.948771 6.923643 124.267508
max 140.000000 145.000000 205.000000 43.675493 99.981876 9.935091 298.560117
In [11]:
crp_rcmnd.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB
In [12]:
crp_rcmnd.mean()
Out[12]:
N               50.551818
P               53.362727
K               48.149091
temperature     25.616244
humidity        71.481779
ph               6.469480
rainfall       103.463655
dtype: float64

Viewing and Selecting¶

In [13]:
crp_rcmnd.head()
Out[13]:
N P K temperature humidity ph rainfall label
0 90 42 43 20.879744 82.002744 6.502985 202.935536 rice
1 85 58 41 21.770462 80.319644 7.038096 226.655537 rice
2 60 55 44 23.004459 82.320763 7.840207 263.964248 rice
3 74 35 40 26.491096 80.158363 6.980401 242.864034 rice
4 78 42 42 20.130175 81.604873 7.628473 262.717340 rice
In [14]:
crp_rcmnd.tail()
Out[14]:
N P K temperature humidity ph rainfall label
2195 107 34 32 26.774637 66.413269 6.780064 177.774507 coffee
2196 99 15 27 27.417112 56.636362 6.086922 127.924610 coffee
2197 118 33 30 24.131797 67.225123 6.362608 173.322839 coffee
2198 117 32 34 26.272418 52.127394 6.758793 127.175293 coffee
2199 104 18 30 23.603016 60.396475 6.779833 140.937041 coffee
In [15]:
crp_rcmnd.loc[175]
Out[15]:
N                     77
P                     52
K                     17
temperature    24.863749
humidity       65.742005
ph                5.7148
rainfall       75.822705
label              maize
Name: 175, dtype: object
In [16]:
crp_rcmnd.label
Out[16]:
0         rice
1         rice
2         rice
3         rice
4         rice
         ...  
2195    coffee
2196    coffee
2197    coffee
2198    coffee
2199    coffee
Name: label, Length: 2200, dtype: object
In [17]:
crp_rcmnd[crp_rcmnd.label == "mango"]
Out[17]:
N P K temperature humidity ph rainfall label
1100 2 40 27 29.737700 47.548852 5.954627 90.095869 mango
1101 39 24 31 33.556956 53.729798 4.757115 98.675276 mango
1102 21 26 27 27.003155 47.675254 5.699587 95.851183 mango
1103 25 22 25 33.561502 45.535566 5.977414 95.705259 mango
1104 0 21 32 35.898556 54.259642 6.430139 92.197217 mango
... ... ... ... ... ... ... ... ...
1195 19 38 26 31.484517 48.779263 4.525722 93.172220 mango
1196 21 21 30 27.698193 51.415932 5.403908 100.772070 mango
1197 22 18 33 30.412358 52.481006 6.621624 93.923759 mango
1198 31 20 30 32.177520 54.013527 6.207496 91.887661 mango
1199 18 26 31 32.611261 47.749165 5.418475 91.101908 mango

100 rows × 8 columns

In [18]:
crp_rcmnd[crp_rcmnd.label == "watermelon"]
Out[18]:
N P K temperature humidity ph rainfall label
1300 119 25 51 26.473302 80.922544 6.283818 53.657426 watermelon
1301 119 19 55 25.187800 83.446217 6.818261 46.874209 watermelon
1302 105 30 50 25.299547 81.775276 6.376201 57.041471 watermelon
1303 114 8 50 24.746313 88.308663 6.581588 57.958261 watermelon
1304 93 22 52 26.587407 81.325632 6.932740 41.875400 watermelon
... ... ... ... ... ... ... ... ...
1395 97 12 47 25.287846 89.636679 6.765095 58.286977 watermelon
1396 110 7 45 26.638386 84.695469 6.189214 48.324286 watermelon
1397 96 18 50 25.331045 84.305338 6.904242 41.532187 watermelon
1398 83 23 55 26.897502 83.892415 6.463271 43.971937 watermelon
1399 120 24 47 26.986037 89.413849 6.260839 58.548767 watermelon

100 rows × 8 columns

In [19]:
crp_rcmnd[crp_rcmnd.ph <= 4.0]
Out[19]:
N P K temperature humidity ph rainfall label
500 3 49 18 27.910952 64.709306 3.692864 32.678919 mothbeans
521 22 49 22 28.234947 61.562052 3.711059 72.666664 mothbeans
526 8 60 18 31.216300 46.018682 3.808429 53.120528 mothbeans
529 36 43 24 27.094006 43.653054 3.510404 41.537495 mothbeans
535 11 45 19 28.700121 44.359648 3.828031 44.116221 mothbeans
537 17 57 20 28.506779 45.200945 3.793575 66.176146 mothbeans
557 4 46 15 31.012749 62.403925 3.504752 63.771924 mothbeans
561 35 51 17 28.799292 49.842134 3.558823 40.855347 mothbeans
582 19 51 25 26.804744 48.239914 3.525366 43.878020 mothbeans
599 16 51 21 31.019636 49.976752 3.532009 32.812965 mothbeans
In [20]:
pd.crosstab(crp_rcmnd.N ,crp_rcmnd.label)
Out[20]:
label apple banana blackgram chickpea coconut coffee cotton grapes jute kidneybeans ... mango mothbeans mungbean muskmelon orange papaya pigeonpeas pomegranate rice watermelon
N
0 3 0 0 0 4 0 0 1 0 2 ... 3 1 1 0 4 0 1 3 0 0
1 2 0 0 0 3 0 0 1 0 1 ... 3 0 2 0 3 0 3 1 0 0
2 6 0 0 0 2 0 0 1 0 1 ... 4 2 3 0 0 0 1 2 0 0
3 1 0 0 0 2 0 0 2 0 2 ... 2 3 0 0 0 0 3 3 0 0
4 0 0 0 0 0 0 0 2 0 1 ... 2 4 5 0 3 0 1 6 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
134 0 0 0 0 0 0 2 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
135 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
136 0 0 0 0 0 0 2 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
139 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
140 0 0 0 0 0 0 3 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

137 rows × 22 columns

In [21]:
crp_rcmnd.groupby(["label"]).mean()
Out[21]:
N P K temperature humidity ph rainfall
label
apple 20.80 134.22 199.89 22.630942 92.333383 5.929663 112.654779
banana 100.23 82.01 50.05 27.376798 80.358123 5.983893 104.626980
blackgram 40.02 67.47 19.24 29.973340 65.118426 7.133952 67.884151
chickpea 40.09 67.79 79.92 18.872847 16.860439 7.336957 80.058977
coconut 21.98 16.93 30.59 27.409892 94.844272 5.976562 175.686646
coffee 101.20 28.74 29.94 25.540477 58.869846 6.790308 158.066295
cotton 117.77 46.24 19.56 23.988958 79.843474 6.912675 80.398043
grapes 23.18 132.53 200.11 23.849575 81.875228 6.025937 69.611829
jute 78.40 46.86 39.99 24.958376 79.639864 6.732778 174.792798
kidneybeans 20.75 67.54 20.05 20.115085 21.605357 5.749411 105.919778
lentil 18.77 68.36 19.41 24.509052 64.804785 6.927932 45.680454
maize 77.76 48.44 19.79 22.389204 65.092249 6.245190 84.766988
mango 20.07 27.18 29.92 31.208770 50.156573 5.766373 94.704515
mothbeans 21.44 48.01 20.23 28.194920 53.160418 6.831174 51.198487
mungbean 20.99 47.28 19.87 28.525775 85.499975 6.723957 48.403601
muskmelon 100.32 17.72 50.08 28.663066 92.342802 6.358805 24.689952
orange 19.58 16.55 10.01 22.765725 92.170209 7.016957 110.474969
papaya 49.88 59.05 50.04 33.723859 92.403388 6.741442 142.627839
pigeonpeas 20.73 67.73 20.29 27.741762 48.061633 5.794175 149.457564
pomegranate 18.87 18.75 40.21 21.837842 90.125504 6.429172 107.528442
rice 79.89 47.58 39.87 23.689332 82.272822 6.425471 236.181114
watermelon 99.42 17.00 50.22 25.591767 85.160375 6.495778 50.786219
In [22]:
crp_rcmnd.isnull()
Out[22]:
N P K temperature humidity ph rainfall label
0 False False False False False False False False
1 False False False False False False False False
2 False False False False False False False False
3 False False False False False False False False
4 False False False False False False False False
... ... ... ... ... ... ... ... ...
2195 False False False False False False False False
2196 False False False False False False False False
2197 False False False False False False False False
2198 False False False False False False False False
2199 False False False False False False False False

2200 rows × 8 columns

In [23]:
crp_rcmnd.isnull().values.any()
Out[23]:
False

As rainfall in hydroponic condition is not our concern.¶

crp_rcmnd = crp_rcmnd.drop("rainfall", axis =1)

In [24]:
crp_rcmnd
Out[24]:
N P K temperature humidity ph rainfall label
0 90 42 43 20.879744 82.002744 6.502985 202.935536 rice
1 85 58 41 21.770462 80.319644 7.038096 226.655537 rice
2 60 55 44 23.004459 82.320763 7.840207 263.964248 rice
3 74 35 40 26.491096 80.158363 6.980401 242.864034 rice
4 78 42 42 20.130175 81.604873 7.628473 262.717340 rice
... ... ... ... ... ... ... ... ...
2195 107 34 32 26.774637 66.413269 6.780064 177.774507 coffee
2196 99 15 27 27.417112 56.636362 6.086922 127.924610 coffee
2197 118 33 30 24.131797 67.225123 6.362608 173.322839 coffee
2198 117 32 34 26.272418 52.127394 6.758793 127.175293 coffee
2199 104 18 30 23.603016 60.396475 6.779833 140.937041 coffee

2200 rows × 8 columns

In [25]:
crp_rcmnd.sample(frac = 1).head(10)
Out[25]:
N P K temperature humidity ph rainfall label
1453 93 22 48 29.125337 91.522911 6.776988 21.904404 muskmelon
2143 113 33 34 26.003740 62.144510 6.559817 153.477776 coffee
726 49 68 22 28.568406 61.532786 7.127064 63.497263 blackgram
236 57 58 77 18.726494 17.584064 7.978997 81.201765 chickpea
1457 100 14 49 29.488830 91.075742 6.365957 26.019094 muskmelon
1896 28 27 32 28.940997 93.001090 5.764615 191.772309 coconut
2181 101 31 26 26.708975 69.711841 6.861235 158.860889 coffee
1505 32 137 204 22.860066 93.128599 5.824152 117.729673 apple
733 53 67 17 31.776817 69.018529 7.296972 61.468929 blackgram
451 24 73 20 19.637362 32.315289 4.608695 176.413409 pigeonpeas
In [26]:
crp_rcmnd.shape
Out[26]:
(2200, 8)
In [27]:
crp_rcmnd.duplicated().sum()
Out[27]:
0
In [28]:
crp_rcmnd.corr()
Out[28]:
N P K temperature humidity ph rainfall
N 1.000000 -0.231460 -0.140512 0.026504 0.190688 0.096683 0.059020
P -0.231460 1.000000 0.736232 -0.127541 -0.118734 -0.138019 -0.063839
K -0.140512 0.736232 1.000000 -0.160387 0.190859 -0.169503 -0.053461
temperature 0.026504 -0.127541 -0.160387 1.000000 0.205320 -0.017795 -0.030084
humidity 0.190688 -0.118734 0.190859 0.205320 1.000000 -0.008483 0.094423
ph 0.096683 -0.138019 -0.169503 -0.017795 -0.008483 1.000000 -0.109069
rainfall 0.059020 -0.063839 -0.053461 -0.030084 0.094423 -0.109069 1.000000
In [29]:
import seaborn as sns
import matplotlib as plt
import matplotlib.pyplot as plt
import numpy as np
In [30]:
corr = crp_rcmnd.corr()
sns.heatmap(corr, annot=True, cbar=True, cmap= 'coolwarm')
Out[30]:
<AxesSubplot:>
In [31]:
crp_rcmnd = crp_rcmnd.drop("rainfall", axis =1)
In [32]:
crp_rcmnd
Out[32]:
N P K temperature humidity ph label
0 90 42 43 20.879744 82.002744 6.502985 rice
1 85 58 41 21.770462 80.319644 7.038096 rice
2 60 55 44 23.004459 82.320763 7.840207 rice
3 74 35 40 26.491096 80.158363 6.980401 rice
4 78 42 42 20.130175 81.604873 7.628473 rice
... ... ... ... ... ... ... ...
2195 107 34 32 26.774637 66.413269 6.780064 coffee
2196 99 15 27 27.417112 56.636362 6.086922 coffee
2197 118 33 30 24.131797 67.225123 6.362608 coffee
2198 117 32 34 26.272418 52.127394 6.758793 coffee
2199 104 18 30 23.603016 60.396475 6.779833 coffee

2200 rows × 7 columns

In [33]:
crp_rcmnd["label"].unique()
Out[33]:
array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)
In [34]:
crp_rcmnd["label"].value_counts()
Out[34]:
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: label, dtype: int64
In [35]:
crp_rcmnd.plot(x = "label", y ="N" , kind= "scatter", figsize=(28,5))
Out[35]:
<AxesSubplot:xlabel='label', ylabel='N'>
In [36]:
crp_rcmnd["P"].plot.hist()
Out[36]:
<AxesSubplot:ylabel='Frequency'>
In [37]:
orange = crp_rcmnd[crp_rcmnd["label"] == "orange"]
In [38]:
len(orange)
Out[38]:
100
In [39]:
orange.head()
Out[39]:
N P K temperature humidity ph label
1600 22 30 12 15.781442 92.510777 6.354007 orange
1601 37 6 13 26.030973 91.508193 7.511755 orange
1602 27 13 6 13.360506 91.356082 7.335158 orange
1603 7 16 9 18.879577 92.043045 7.813917 orange
1604 20 7 9 29.477417 91.578029 7.129137 orange
In [40]:
crp_rcmnd_mean = pd.pivot_table(crp_rcmnd,index=['label'],aggfunc='mean')
In [41]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import random
number_of_colors = 16

col_arr = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
             for i in range(number_of_colors)]


phos_summary = crp_rcmnd_mean.sort_values(by='P',ascending=False)

fig = make_subplots(rows=1, cols=2)

top_10 = { 'y': phos_summary['P'][0:10].sort_values().index,
           'x': phos_summary['P'][0:10].sort_values() 
         }

last_10 = { 'y': phos_summary['P'][-10:].sort_values().index,
           'x': phos_summary['P'][-10:].sort_values() 
         }

fig.add_trace(
                go.Bar( top_10,
                       marker_color= random.choice(col_arr),
                       name = 'Most Phosporus Required',
                       orientation = 'h',
                       text = top_10['x']
                      ),
                row=1, col=1
             )

fig.add_trace(
                go.Bar( last_10,
                       marker_color= random.choice(col_arr),
                       name = 'Least Phosporus Required',
                       orientation = 'h',
                       text = last_10['x']
                      ),
                row=1, col=2
             )

fig.update_traces(texttemplate='%{text}',textposition='inside')
fig.update_layout(title='Phosporus',font_size=12)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
In [42]:
fig = go.Figure()

fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
                       y = crp_rcmnd_mean['N'],
                      marker_color = 'blue',
                      name = 'Nitrogen'
                     )
             )

fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
                       y = crp_rcmnd_mean['P'],
                      marker_color = 'light blue',
                      name = 'Phosphorous'
                     )
             )

fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
                       y = crp_rcmnd_mean['K'],
                      marker_color = 'green',
                      name = 'Potassium'
                     )
             )
fig.update_layout(title='Comparison between N, P, k')
In [43]:
features = crp_rcmnd[['N', 'P','K','temperature', 'humidity', 'ph']]
target = crp_rcmnd['label']
labels = crp_rcmnd['label']
In [44]:
acc = []
model = []

Splitting into train and test data¶

In [45]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,target,test_size = 0.2,random_state =2)

Decision Tree¶

In [46]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

DecisionTree = DecisionTreeClassifier(criterion="entropy",random_state=2,max_depth=11)

DecisionTree.fit(Xtrain,Ytrain)

predicted_values = DecisionTree.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('Decision Tree')
print("DecisionTrees's Accuracy is: ", x*100)

print(classification_report(Ytest,predicted_values))
DecisionTrees's Accuracy is:  95.9090909090909
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        13
      banana       1.00      1.00      1.00        17
   blackgram       0.73      1.00      0.84        16
    chickpea       1.00      1.00      1.00        21
     coconut       1.00      1.00      1.00        21
      coffee       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        18
        jute       0.81      0.89      0.85        28
 kidneybeans       1.00      1.00      1.00        14
      lentil       1.00      0.78      0.88        23
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        26
   mothbeans       0.94      0.84      0.89        19
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        23
      orange       1.00      1.00      1.00        29
      papaya       1.00      0.95      0.97        19
  pigeonpeas       0.89      0.94      0.92        18
 pomegranate       1.00      1.00      1.00        17
        rice       0.79      0.69      0.73        16
  watermelon       1.00      1.00      1.00        15

    accuracy                           0.96       440
   macro avg       0.96      0.96      0.96       440
weighted avg       0.96      0.96      0.96       440

In [47]:
from sklearn.model_selection import cross_val_score
In [48]:
# Cross validation score (Decision Tree)
score = cross_val_score(DecisionTree, features, target,cv=5)
In [49]:
score
Out[49]:
array([0.95227273, 0.95454545, 0.95681818, 0.94772727, 0.95227273])
In [50]:
import pickle
# Dump the trained Naive Bayes classifier with Pickle
DT_pkl_filename = 'DecisionTree.pkl'
# Open the file to save as pkl file
DT_Model_pkl = open(DT_pkl_filename, 'wb')
pickle.dump(DecisionTree, DT_Model_pkl)
# Close the pickle instances
DT_Model_pkl.close()
In [51]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Ytest, predicted_values)

plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
In [52]:
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = DecisionTree.predict(data)
print(prediction)
['jute']

Support Vector Machine¶

In [53]:
from sklearn.svm import SVC

SVM = SVC(gamma='auto')

SVM.fit(Xtrain,Ytrain)

predicted_values = SVM.predict(Xtest)

x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('SVM')
print("SVM's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))
SVM's Accuracy is:  0.31363636363636366
              precision    recall  f1-score   support

       apple       1.00      0.62      0.76        13
      banana       1.00      0.47      0.64        17
   blackgram       0.67      0.38      0.48        16
    chickpea       1.00      0.24      0.38        21
     coconut       1.00      0.33      0.50        21
      coffee       1.00      0.23      0.37        22
      cotton       1.00      0.50      0.67        20
      grapes       1.00      0.22      0.36        18
        jute       1.00      0.25      0.40        28
 kidneybeans       0.05      1.00      0.09        14
      lentil       0.75      0.13      0.22        23
       maize       1.00      0.10      0.17        21
       mango       1.00      0.15      0.27        26
   mothbeans       1.00      0.21      0.35        19
    mungbean       1.00      0.21      0.34        24
   muskmelon       0.92      0.52      0.67        23
      orange       1.00      0.10      0.19        29
      papaya       1.00      0.37      0.54        19
  pigeonpeas       0.33      0.11      0.17        18
 pomegranate       0.91      0.59      0.71        17
        rice       0.67      0.50      0.57        16
  watermelon       0.80      0.27      0.40        15

    accuracy                           0.31       440
   macro avg       0.87      0.34      0.42       440
weighted avg       0.89      0.31      0.41       440

In [54]:
# Cross validation score (SVM)
score = cross_val_score(SVM,features,target,cv=5)
score
Out[54]:
array([0.48863636, 0.44772727, 0.44090909, 0.475     , 0.475     ])
In [55]:
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = DecisionTree.predict(data)
print(prediction)
['jute']

Random Forest¶

In [56]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators=30, max_depth=14, random_state=0)
RF.fit(Xtrain,Ytrain)

predicted_values = RF.predict(Xtest)

x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('RF')
print("RF's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))
RF's Accuracy is:  0.975
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        13
      banana       1.00      1.00      1.00        17
   blackgram       0.88      0.88      0.88        16
    chickpea       1.00      1.00      1.00        21
     coconut       1.00      1.00      1.00        21
      coffee       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        18
        jute       0.96      0.89      0.93        28
 kidneybeans       1.00      1.00      1.00        14
      lentil       0.91      0.91      0.91        23
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        26
   mothbeans       0.94      0.89      0.92        19
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        23
      orange       1.00      1.00      1.00        29
      papaya       1.00      1.00      1.00        19
  pigeonpeas       0.89      0.94      0.92        18
 pomegranate       1.00      1.00      1.00        17
        rice       0.83      0.94      0.88        16
  watermelon       1.00      1.00      1.00        15

    accuracy                           0.97       440
   macro avg       0.97      0.98      0.97       440
weighted avg       0.98      0.97      0.98       440

In [57]:
# Cross validation score (Random Forest)
score = cross_val_score(RF,features,target,cv=5)
score
Out[57]:
array([0.97272727, 0.96136364, 0.97045455, 0.96136364, 0.97045455])
In [58]:
# Dump the trained Naive Bayes classifier with Pickle
RF_pkl_filename = 'RandomForest.pkl'
# Open the file to save as pkl file
RF_Model_pkl = open(RF_pkl_filename, 'wb')
pickle.dump(RF, RF_Model_pkl)
# Close the pickle instances
RF_Model_pkl.close()
In [59]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Ytest, predicted_values)

plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
In [60]:
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = RF.predict(data)
print(prediction)
['jute']

KNN¶

In [61]:
from sklearn.neighbors import KNeighborsClassifier
In [62]:
knn = KNeighborsClassifier(n_neighbors= 15, algorithm = 'kd_tree', metric = 'manhattan')
knn.fit(Xtrain,Ytrain)
predicted_values = knn.predict(Xtest)

x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('KNN')
print("KNN's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))
KNN's Accuracy is:  0.9318181818181818
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        13
      banana       1.00      1.00      1.00        17
   blackgram       0.64      0.88      0.74        16
    chickpea       1.00      1.00      1.00        21
     coconut       1.00      1.00      1.00        21
      coffee       1.00      1.00      1.00        22
      cotton       0.95      1.00      0.98        20
      grapes       1.00      1.00      1.00        18
        jute       1.00      0.54      0.70        28
 kidneybeans       1.00      1.00      1.00        14
      lentil       0.72      0.78      0.75        23
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        26
   mothbeans       0.94      0.89      0.92        19
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        23
      orange       1.00      1.00      1.00        29
      papaya       1.00      1.00      1.00        19
  pigeonpeas       1.00      0.61      0.76        18
 pomegranate       1.00      1.00      1.00        17
        rice       0.55      1.00      0.71        16
  watermelon       1.00      1.00      1.00        15

    accuracy                           0.93       440
   macro avg       0.95      0.94      0.93       440
weighted avg       0.95      0.93      0.93       440

In [63]:
# Cross validation score (KNN)
score = cross_val_score(knn,features,target,cv=5)
score
Out[63]:
array([0.92727273, 0.95454545, 0.93863636, 0.91590909, 0.91590909])
In [64]:
# Dump the trained KNN classifier with Pickle
knn_pkl_filename = 'knn.pkl'
# Open the file to save as pkl file
knn_Model_pkl = open(knn_pkl_filename, 'wb')
pickle.dump(knn, knn_Model_pkl)
# Close the pickle instances
knn_Model_pkl.close()
In [65]:
cm = confusion_matrix(Ytest, predicted_values)

plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
In [66]:
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = knn.predict(data)
print(prediction)
['jute']

XG Boost¶

In [67]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

Ytrain = label_encoder.fit_transform(Ytrain)
Ytest = label_encoder.fit_transform(Ytest)
target = label_encoder.fit_transform(target)

XB = xgb.XGBClassifier()

param_grid = {
    'learning_rate': [0.055],
    'max_depth': [3],
    'n_estimators': [150],
    'subsample': [ 1.0],
    'colsample_bytree': [1.0],
    'reg_alpha': [0.1],
    'reg_lambda': [0.1],
    'min_child_weight': [1]
}

grid_search = GridSearchCV(estimator=XB, param_grid=param_grid, cv=5)

grid_search.fit(Xtrain,Ytrain)

best_model = grid_search.best_estimator_

predicted_values = best_model.predict(Xtest)


x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('XGBoost')
print("XGBoost's Accuracy is: ", x)
XGBoost's Accuracy is:  0.9704545454545455
In [68]:
# Cross validation score (XGBoost)
score = cross_val_score(best_model,features,target,cv=5)
score
Out[68]:
array([0.96818182, 0.96363636, 0.96363636, 0.96363636, 0.96590909])
In [69]:
cm = confusion_matrix(Ytest, predicted_values)

plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
In [70]:
from sklearn.preprocessing import LabelEncoder
data = pd.DataFrame([[85,58,41,21.770462,80.319644,7.038096]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = best_model.predict(data)
print(prediction)
[20]
In [71]:
data = pd.DataFrame([[83, 55, 65, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = best_model.predict(data)
print(prediction)
[5]